pacman::p_load(tidyverse, data.table, broom)
rm(list=ls())
################################################################################


#################### Beef

#Hub crosswalk
df = fread(paste0("../../data/agribusiness/clean/hubs_matched_beef_brazil.csv"), header=T)[,list(hub, county_id)] #manually produced
setnames(df, old=c('county_id'),new=c('county_id_hub'))
df = df[!grepl("UNKNOWN", df$hub),]
df_c = unique(df, by=c('hub'))

#Trade flows
df = fread(paste0("../../data/agribusiness/clean/beef_brazil.csv.gz"))
df = df[id_type=='county_known',]
df = df[!grepl("UNKNOWN", df$hub),]
df=merge(df, df_c, by='hub', all.x=T)
df_t = df[,list(county,county_id,amc_id,microregion_id,mesoregion_id,state_id,state,region,hub,county_id_hub,equivalent_tonnes,year,product_type)]

#Distance matrix
df = fread("../../data/shapefiles/clean/distance_county_county_brazil.csv.gz")
setnames(df, old=c('origin','destination'),new=c('county_id','county_id_hub'))
df_d = df

#Merge matrix
df=merge(df_t, df_d, by=c('county_id','county_id_hub'), all.x=T)
write.csv(df, gzfile(paste0("../../data/agribusiness/clean/beef_brazil_county_hub_distance.csv.gz")), row.names = FALSE)


#################### Soybean - Brazil

#Hub crosswalk
df = fread(paste0("../../data/agribusiness/clean/hubs_matched_soybean_brazil.csv"), header=T)[,list(hub, county_id)] #manually produced
setnames(df, old=c('county_id'),new=c('county_id_hub'))
df = df[!grepl("UNKNOWN", df$hub),]
df_c = unique(df, by=c("hub"))

#Trade flows
df = fread(paste0("../../data/agribusiness/clean/soybean_brazil.csv.gz"))
df = df[id_type=='county_known',]
df = df[!grepl("UNKNOWN", df$hub),]
df=merge(df, df_c, by='hub', all.x=T)
df_t = df[,list(county,county_id,amc_id,microregion_id,mesoregion_id,state_id,state,region,hub,county_id_hub,equivalent_tonnes,year,product_type)]

#Distance matrix
df = fread("../../data/shapefiles/clean/distance_county_county_brazil.csv.gz")
setnames(df, old=c('origin','destination'),new=c('county_id','county_id_hub'))
df_d = df

#Merge matrix
df=merge(df_t, df_d, by=c('county_id','county_id_hub'), all.x=T)
write.csv(df, gzfile(paste0("../../data/agribusiness/clean/soybean_brazil_county_hub_distance.csv.gz")), row.names = FALSE)


#################### Maize - Brazil

#Hub crosswalk
df = fread(paste0("../../data/agribusiness/clean/hubs_matched_maize_brazil.csv"), header=T)[,list(hub, county_id)] #manually produced
setnames(df, old=c('county_id'),new=c('county_id_hub'))
df = df[!grepl("UNKNOWN", df$hub),]
df_c = unique(df, by=c("hub"))

#Trade flows
df = fread(paste0("../../data/agribusiness/clean/maize_brazil.csv.gz"))
df = df[id_type=='county_known',]
df = df[!grepl("UNKNOWN", df$hub),]
df=merge(df, df_c, by='hub', all.x=T)
df_t = df[,list(county,county_id,amc_id,microregion_id,mesoregion_id,state_id,state,region,hub,county_id_hub,equivalent_tonnes,year,product_type)]

#Distance matrix
df = fread("../../data/shapefiles/clean/distance_county_county_brazil.csv.gz")
setnames(df, old=c('origin','destination'),new=c('county_id','county_id_hub'))
df_d = df

#Merge matrix
df=merge(df_t, df_d, by=c('county_id','county_id_hub'), all.x=T)
write.csv(df, gzfile(paste0("../../data/agribusiness/clean/maize_brazil_county_hub_distance.csv.gz")), row.names = FALSE)



#################### Soybean - Argentina

#Hub crosswalk
df = fread(paste0("../../data/agribusiness/clean/ports_matched_soybean_argentina.csv"), header=T)[,list(port, county_id)] #manually produced
setnames(df, old=c('county_id'),new=c('county_id_port'))
df = df[!grepl("UNKNOWN", df$port),]
df_c = unique(df, by=c("port"))

#Trade flows
df = fread(paste0("../../data/agribusiness/clean/soybean_argentina.csv.gz"))
df = df[id_type=='county_known',]
df = df[!grepl("UNKNOWN", df$port),]
df=merge(df, df_c, by='port', all.x=T)
df_t = df[,list(county,county_id,state_id,state,region,port,county_id_port,equivalent_tonnes,year,product_type)]

#Distance matrix
df = fread("../../data/shapefiles/clean/distance_county_county_argentina.csv.gz")
setnames(df, old=c('origin','destination'),new=c('county_id','county_id_port'))
df_d = df

#Merge matrix
df=merge(df_t, df_d, by=c('county_id','county_id_port'), all.x=T)
write.csv(df, gzfile(paste0("../../data/agribusiness/clean/soybean_argentina_county_port_distance.csv.gz")), row.names = FALSE)



